gorgonia.org/gorgonia@v0.9.17/cuda modules/src/elembinop.cu (about) 1 #define _USE_MATH_DEFINES 2 #include <math.h> 3 4 #define THREADID \ 5 int blockId = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;\ 6 int idx = blockId * (blockDim.x * blockDim.y * blockDim.z) + (threadIdx.z * (blockDim.x * blockDim.y)) + (threadIdx.y * blockDim.x) + threadIdx.x; 7 8 #define CHECKSIZE \ 9 if (idx >= size) { \ 10 return; \ 11 } 12 13 #define VVBINOP(name, t, type, op)\ 14 __global__ void name ##_vv_ ##t(type* A, type* B, int size) { \ 15 THREADID \ 16 CHECKSIZE \ 17 A[idx] = A[idx] op B[idx];} 18 19 #define VSBINOP(name, t, type, op)\ 20 __global__ void name ##_vs_ ##t(type* A, type* B, int size) { \ 21 THREADID \ 22 CHECKSIZE \ 23 A[idx] = A[idx] op B[0];} 24 25 #define SVBINOP(name, t, type, op)\ 26 __global__ void name ##_sv_ ##t(type* A, type* B, int size) { \ 27 THREADID \ 28 CHECKSIZE \ 29 B[idx] = A[0] op B[idx];} 30 31 #define SSBINOP(name, t, type, op)\ 32 __global__ void name ##_ss_ ##t(type* A, type* B, int size) { \ 33 THREADID \ 34 CHECKSIZE \ 35 A[0] = A[0] op B[0];} 36 37 /* VECTOR-VECTOR BIN OP */ 38 39 extern "C" { VVBINOP(add, f64, double, +) } 40 extern "C" { VVBINOP(add, f32, float, +) } 41 42 extern "C" { VVBINOP(sub, f64, double, -) } 43 extern "C" { VVBINOP(sub, f32, float, -) } 44 45 extern "C" { VVBINOP(mul, f64, double, *) } 46 extern "C" { VVBINOP(mul, f32, float, *) } 47 48 extern "C" { VVBINOP(div, f64, double, /) } 49 extern "C" { VVBINOP(div, f32, float, /) } 50 51 extern "C" { VVBINOP(gt, f64, double, >)} 52 extern "C" { VVBINOP(gt, f32, float, >)} 53 54 extern "C" { VVBINOP(gte, f64, double, >=)} 55 extern "C" { VVBINOP(gte, f32, float, >=)} 56 57 extern "C" { VVBINOP(lt, f64, double, <)} 58 extern "C" { VVBINOP(lt, f32, float, <)} 59 60 extern "C" { VVBINOP(lte, f64, double, <=)} 61 extern "C" { VVBINOP(lte, f32, float, <=)} 62 63 extern "C" { VVBINOP(eq, f64, double, ==)} 64 extern "C" { VVBINOP(eq, f32, float, ==)} 65 66 extern "C" { VVBINOP(ne, f64, double, !=)} 67 extern "C" { VVBINOP(ne, f32, float, !=)} 68 69 70 /* VECTOR-SCALAR BIN OP */ 71 72 extern "C" { VSBINOP(add, f64, double, +) } 73 extern "C" { VSBINOP(add, f32, float, +) } 74 75 extern "C" { VSBINOP(sub, f64, double, -) } 76 extern "C" { VSBINOP(sub, f32, float, -) } 77 78 extern "C" { VSBINOP(mul, f64, double, *) } 79 extern "C" { VSBINOP(mul, f32, float, *) } 80 81 extern "C" { VSBINOP(div, f64, double, /) } 82 extern "C" { VSBINOP(div, f32, float, /) } 83 84 extern "C" { VSBINOP(gt, f64, double, >)} 85 extern "C" { VSBINOP(gt, f32, float, >)} 86 87 extern "C" { VSBINOP(gte, f64, double, >=)} 88 extern "C" { VSBINOP(gte, f32, float, >=)} 89 90 extern "C" { VSBINOP(lt, f64, double, <)} 91 extern "C" { VSBINOP(lt, f32, float, <)} 92 93 extern "C" { VSBINOP(lte, f64, double, <=)} 94 extern "C" { VSBINOP(lte, f32, float, <=)} 95 96 extern "C" { VSBINOP(eq, f64, double, ==)} 97 extern "C" { VSBINOP(eq, f32, float, ==)} 98 99 extern "C" { VSBINOP(ne, f64, double, !=)} 100 extern "C" { VSBINOP(ne, f32, float, !=)} 101 102 /* SCALAR-VECTOR BIN OP */ 103 104 extern "C" { SVBINOP(add, f64, double, +) } 105 extern "C" { SVBINOP(add, f32, float, +) } 106 107 extern "C" { SVBINOP(sub, f64, double, -) } 108 extern "C" { SVBINOP(sub, f32, float, -) } 109 110 extern "C" { SVBINOP(mul, f64, double, *) } 111 extern "C" { SVBINOP(mul, f32, float, *) } 112 113 extern "C" { SVBINOP(div, f64, double, /) } 114 extern "C" { SVBINOP(div, f32, float, /) } 115 116 extern "C" { SVBINOP(gt, f64, double, >) } 117 extern "C" { SVBINOP(gt, f32, float, >) } 118 119 extern "C" { SVBINOP(gte, f64, double, >=) } 120 extern "C" { SVBINOP(gte, f32, float, >=) } 121 122 extern "C" { SVBINOP(lt, f64, double, <) } 123 extern "C" { SVBINOP(lt, f32, float, <) } 124 125 extern "C" { SVBINOP(lte, f64, double, <=) } 126 extern "C" { SVBINOP(lte, f32, float, <=) } 127 128 extern "C" { SVBINOP(eq, f64, double, ==) } 129 extern "C" { SVBINOP(eq, f32, float, ==) } 130 131 extern "C" { SVBINOP(ne, f64, double, !=) } 132 extern "C" { SVBINOP(ne, f32, float, !=) } 133 134 /* SCALAR-SCALAR BIN OP */ 135 136 extern "C" { SSBINOP(add, f64, double, +) } 137 extern "C" { SSBINOP(add, f32, float, +) } 138 139 extern "C" { SSBINOP(sub, f64, double, -) } 140 extern "C" { SSBINOP(sub, f32, float, -) } 141 142 extern "C" { SSBINOP(mul, f64, double, *) } 143 extern "C" { SSBINOP(mul, f32, float, *) } 144 145 extern "C" { SSBINOP(div, f64, double, /) } 146 extern "C" { SSBINOP(div, f32, float, /) } 147 148 extern "C" { SSBINOP(gt, f64, double, >)} 149 extern "C" { SSBINOP(gt, f32, float, >)} 150 151 extern "C" { SSBINOP(gte, f64, double, >=)} 152 extern "C" { SSBINOP(gte, f32, float, >=)} 153 154 extern "C" { SSBINOP(lt, f64, double, <)} 155 extern "C" { SSBINOP(lt, f32, float, <)} 156 157 extern "C" { SSBINOP(lte, f64, double, <=)} 158 extern "C" { SSBINOP(lte, f32, float, <=)} 159 160 extern "C" { SSBINOP(eq, f64, double, ==)} 161 extern "C" { SSBINOP(eq, f32, float, ==)} 162 163 extern "C" { SSBINOP(ne, f64, double, !=)} 164 extern "C" { SSBINOP(ne, f32, float, !=)} 165 166 /* FUNCTION BIN OP */ 167 168 #define VVFNBINOP(name, t, type, op)\ 169 __global__ void name ##_vv_ ##t(type* A, type* B, int size) { \ 170 THREADID \ 171 CHECKSIZE \ 172 A[idx] = op(A[idx], B[idx]);} 173 174 #define VSFNBINOP(name, t, type, op)\ 175 __global__ void name ##_vs_ ##t(type* A, type* B, int size) { \ 176 THREADID \ 177 CHECKSIZE \ 178 A[idx] = op(A[idx], B[0]);} 179 180 #define SVFNBINOP(name, t, type, op)\ 181 __global__ void name ##_sv_ ##t(type* A, type* B, int size) { \ 182 THREADID \ 183 CHECKSIZE \ 184 B[idx] = op(A[0], B[idx]);} 185 186 #define SSFNBINOP(name, t, type, op)\ 187 __global__ void name ##_ss_ ##t(type* A, type* B, int size) { \ 188 THREADID \ 189 CHECKSIZE \ 190 A[0] = op(A[0], B[0]);} 191 192 extern "C" { VVFNBINOP(pow, f64, double, pow) } 193 extern "C" { VVFNBINOP(pow, f32, float, powf) } 194 extern "C" { VSFNBINOP(pow, f64, double, pow) } 195 extern "C" { VSFNBINOP(pow, f32, float, powf) } 196 extern "C" { SVFNBINOP(pow, f64, double, pow) } 197 extern "C" { SVFNBINOP(pow, f32, float, powf) } 198 extern "C" { SSFNBINOP(pow, f64, double, pow) } 199 extern "C" { SSFNBINOP(pow, f32, float, powf) } 200 201 /* 202 extern "C" { VVFNBINOP(mod, f64, double, modf) } 203 extern "C" { VVFNBINOP(mod, f32, float, modff) } 204 extern "C" { VSFNBINOP(mod, f64, double, modf) } 205 extern "C" { VSFNBINOP(mod, f32, float, modff) } 206 extern "C" { SVFNBINOP(mod, f64, double, modf) } 207 extern "C" { SVFNBINOP(mod, f32, float, modff) } 208 extern "C" { SSFNBINOP(mod, f64, double, modf) } 209 extern "C" { SSFNBINOP(mod, f32, float, modff) } 210 */